	frames reset
	frame create stats_2005
	frame create stats_2017

	loc varlist "totw2comp ptotinc aginc top1"
	
	foreach year in 2005 2017 {
		foreach agerange in ageall age4055  {

			if "`agerange'"=="age4055" {
				use if inrange(age, 40, 55) & year== `year' using "${clean_data}/panel_physicians_clean.dta", clear
			}
				
			else if "`agerange'"=="ageall" {
				use if year== `year' using "${clean_data}/panel_physicians_clean.dta", clear
			}
	
			encode spec_category_desc , g(spec_category_num)
			levelsof spec_category_desc, loc(speclabs)
			loc specno =  `r(r)'
			assert `specno' == 9
			
			cap log close
			log using "${output}/descriptives/specialty_stats_`agerange'_`year'.txt", replace text 
						
			* Summarize variables of interest for each spec_category_code
			gstats tab `varlist', s(mean sd count) pretty by(spec_category_num) matasave(m`agerange')
			
			log close
			
			* Save as matrix
			mata: st_matrix("`agerange'", m`agerange'.output)
			
			* Add median
			loc rows = `: word count `varlist'' * `specno'
			mat median = J(`rows',2,.)
			loc j = 1
		
			loc perc = 50
			
			foreach spec in `speclabs' {
				
				preserve
				keep if spec_category_desc == "`spec'"
				
				foreach var of varl `varlist' {
					loc k = 1
					
					qui centile `var', centile(`=`perc'-0.1' `=`perc'+0.1')
					su `var' if `var' >= `r(c_1)' & `var' <= `r(c_2)'
					if `r(N)' < 11 {
						di "Less than 11 observations between 49.90th and 50.1st percentile for `var' in `spec', use xtile and incl. sufficient obs around percentile instead"
						
						xtile perc = `var', nq(100)
						g aux1 = perc == `perc'
						sort `var'
						g aux2 = _n
						count if aux1 == 1 
						if `r(N)' < 11 {
							loc diff = ceil((11-`r(N)')/2) + 1
							su aux2 if perc == `perc'
							replace aux1 = 1 if inrange(aux2,`=`r(min)' - `diff'',`=`r(min)' + `diff'')
						} 	
						su `var' if aux1 == 1 
						assert `r(N)' > 10
					}
					
					mat median[`j',1] = `r(mean)'
					mat median[`j',2] = `r(N)'
					loc ++j 
					cap drop aux* perc
				}
				restore
			}

			* Mean matrix to dta
			clear
			svmat `agerange' 
			g obsid = _n 
			g spec_category_num = ceil(obsid/`=wordcount("`varlist'")') 
			g spec_category_desc = "" 
			bys spec_category_num: g depno = _n 
			g depvar = "" 
			ren (`agerange'1 `agerange'2 `agerange'3) (mean stdev N_UR_mean_sd) 
			g sample = "`agerange'" 
			g year = `year' 
			tempfile means
			save `means' , replace	
			
			* Median matrix to dta
			clear 
			svmat median
			ren (median1 median2) (median N_UR_median) 
			g spec_category_num = ceil(_n/`=wordcount("`varlist'")') 
			g spec_category_desc = "" 
			bys spec_category_num: g depno = _n
			g depvar = "" 
			g sample = "`agerange'" 
			g year = `year' 
			
			merge 1:1 depno spec_category_num using  `means', assert(3) nogen
		
			tempfile `agerange'
			save ``agerange''
			
			frame stats_`year': append using ``agerange''
		}
	
		* Format data
		frame change stats_`year'

		foreach i of num 1(1)`=wordcount("`varlist'")' {
			replace depvar = "`:word `i' of `varlist''" if depno == `i'
		}
		
		foreach i of num 1(1)`specno' {
			replace spec_category_desc = "`:word `i' of `speclabs''" if spec_category_num == `i'
		}
	
		tempfile temp`year'
		save `temp`year''
	}
	
	use `temp2005', clear
	append using `temp2017'
	
	* Output 
	drbest_docinc mean median stdev
	keep if inlist(depvar,"ptotinc","aginc","totw2comp","top1")
	keep obsid mean median stdev spec_category_desc depvar depno sample N_UR* year
	order obsid spec_category_desc depvar depno year sample mean median stdev N_UR*
	keep if sample == "age4055"
	
	export delimited using "$mypath/intermediate_csv/desc05-specialty_stats.csv", replace dataf delim(tab)  

	* Exhibits with specialty characteristics

	frame create graphs
	foreach agerange in age4055  {

		use "${intermediate_data}/specialty_characteristics/cms_spec_characteristics_`agerange'.dta", clear

		drop if cms_spec_code=="00"
		replace ptotinc = ptotinc/1000
		
		collapse (mean) ptotinc resdur_allyrs resdur_before2010 wkh ///
			einsize samecmsspec_share female desirable_cz sd_ptotinc (rawsum) N_UR = freq (sum) freq [aw=freq], by( cms_spec_desc cms_spec_code)
			
		gen specialty_markers = cms_spec_desc
		drop cms_spec_desc

		gen pos = .
		replace pos = 12	if specialty_markers == "Dermatology" 
		replace pos = 12	if specialty_markers == "Neurosurgery"
		replace pos = 12 	if specialty_markers == "Radiology"
		replace pos = 12 	if specialty_markers == "Ophthalmology"
		replace pos = 12 	if specialty_markers == "Anesthesiology"
		replace pos = 6 	if specialty_markers == "Family Practice"
		replace pos = 12 	if specialty_markers == "Cardiac Surgery"
		replace pos= 0 		if specialty_markers == "Pediatric"
		replace pos= 3 		if specialty_markers == "Internal Medicine"
		replace pos = 12	if specialty_markers == "Orthopedic Surgery"
		
		g sample = "specialty_category; `agerange'"
		drbest_docinc freq 
		tempfile `agerange'
		save ``agerange''
		
		frame graphs: append using ``agerange''

		* Save regressions
		
		cap log close
		log using "${output}/descriptives/income_vs_covariate_cms_spec_regs`agerange'.txt", replace text 

		reg ptotinc wkh [aw = freq]
		reg ptotinc resdur_allyrs [aw = freq]
		reg ptotinc resdur_before2010 [aw = freq]
		reg ptotinc female [aw = freq]
		reg ptotinc einsize [aw = freq]

		log close
		
		* Income vs. share of US-trained physicians (NRMP specialty)
		
		use "${intermediate_data}/NRMP/specialty_applicant.dta", clear
		keep year nrmp_spec_desc n_match_sn n_match_other 
		isid year nrmp_spec_desc	
		gen total_matches=n_match_sn+n_match_other
		gen share_us_sn=n_match_sn/total_matches
		collapse(mean) share_us_sn [aw=total_matches], by(nrmp_spec_desc)
		isid nrmp_spec_desc
		save "${temp}/share_us_sn.dta", replace 
		
		use "${intermediate_data}/specialty_characteristics/nrmp_spec_characteristics_`agerange'.dta", clear
		drop if nrmp_spec_code==0	
		replace ptotinc = ptotinc/1000

		collapse (mean) ptotinc resdur_allyrs wkh (rawsum) N_UR = freq (sum) freq [aw=freq], by(nrmp_spec_code nrmp_spec_desc)
		merge 1:1 nrmp_spec_desc using "${temp}/share_us_sn.dta", keep(match) nogen
		
		* Residualize share US graduates and income
		
		reg share_us_sn resdur_allyrs wkh [aw = freq]
		predict re_share_us_sn, re
		sum share_us_sn
		gen pred_share_us_sn=re_share_us_sn+`r(mean)'
		
		reg ptotinc resdur_allyrs wkh [aw = freq]
		predict re_ptotinc, re
		sum ptotinc
		gen pred_ptotinc=re_ptotinc+`r(mean)'
		
		g sample = "NRMP; `agerange'"
		
		drbcount freq, replace
		tempfile `agerange'
		save ``agerange''
		
		frame graphs: append using ``agerange''
	
		capture log close
		log using "${output}/descriptives/income_vs_covariate_cms_spec_regs`agerange'.txt", append text 
		* NRMP specialties; residualized share US graduates and income. 
		* Covariates: resdur_allyrs wkh
		reg pred_share_us_sn pred_ptotinc [aw = freq]
		log close

	}
	
	* Format and export datasets
	frame change graphs 

	keep nrmp_spec_desc pred_share_us_sn pred_ptotinc sample /// 
	ptotinc wkh specialty_markers pos freq resdur_allyrs female einsize pred_share_us_sn N_UR ///
	einsize
	
	drbest_docinc pred_share_us_sn pred_ptotinc ptotinc wkh freq resdur_allyrs female einsize pred_share_us_sn einsize
	order N_UR, last
	
	export delimited using "${mypath}/intermediate_csv/desc05-specialty_graphs.csv", replace dataf delim(tab) 